# Simulation Washout
## Show that data from aggregated preferences get to Benford

## Define an individual digit preference as a benford distribution where some digits
## are replaced with a preferred digit

digitpreference <- function(n){
    # For sample size n, start with benford numbers
    randommax = floor(10^(5*runif(1)))
    raw = floor(runif(n, min =1, max = randommax))
    
    # Suspects prefer 2s and 8s 
    x <- c(2,8)

    # With 20% probability for each obs, replace a random digit in the number
    # for a preferred digit

    for(i in 1:n){
      if(runif(1) < 0.2){
        # Turn number into string
        num = toString(raw[i])
        # Find position to replace
        pos = sample(1:nchar(num), 1)
        
        replace = sample(x, 1)
        
        substr(num, pos, pos) = toString(replace)
        raw[i] = as.numeric(num)
      } 
    }
    return(raw)
}



# Create data that are sums of products of numbers with integers
n <- 10000

D = data.frame(matrix(ncol = 0, nrow = n))
D$money <- 0
for(i in 1:n){
	print(i)
	lineitems <- floor(runif(1, min =1, max = 100))
	price <- digitpreference(lineitems)
	
	maxquantity = floor(10^(2*runif(1)))
	quantity <-  floor(runif(lineitems, min = 1, max = maxquantity))
	D$money[i] <- sum(price* quantity)

	#print(price)
	#print(quantity)
	#print(D$money[i])
	
}
n <- 1000

# Look at values
#summary(D$money)
#hist(D$money)

# A necessary feature for processing: some categorical column
D$District <- "A"

#Run our tests against it
#library(devtools)
#install_github("jlederluis/digitanalysis", force = TRUE)

library(digitanalysis)
library(ggplot2)

Data <- process_digit_data(raw_df = D, digit_columns = c("money"))

#######################################

# First digit test
first_digit = all_digits_test(digitdata = Data, digit_places = 1, omit_05 = 0, plot=TRUE)
first_digit$p_values
first_digit$plot$AllBreakout


#All digits test except first with expenditure
all_digits = all_digits_test(digitdata = Data, data_columns = 'money', skip_first_digit = TRUE, plot = TRUE)
all_digits$plots$AllBreakout
all_digits$p_values


pdf("First.pdf")
first_digit$plot$AllBreakout$AllCategory$aggregate_barplot + ggtitle("A")
dev.off()

pdf("All.pdf")
all_digits$plot$AllBreakout$AllCategory$aggregate_barplot  + ggtitle("B")
dev.off()


sink(file = "P-Values.txt")

print("First Digits")
first_digit$p_values


print("AllDigits")
all_digits$p_values

sink()
